import json
import os
import gzip

path_openalex = '../'

## Input: 
## 1. OpenAlexID_ReferencedPaperIDs.txt (17 G)

## Output: 
## 1. OpenAlexID_CitingPaperIDs.txt (17 G)


cit_dict = {}
n = 0
with open(path_openalex+'OpenAlexID_ReferencedPaperIDs.txt','r') as f:
    for line in f:
        n += 1
        if n% 1000000 == 0:
            print(n/1000000)
        line = line.strip('\n').split('\t')
        foc = line[0]
        ref = line[1:]
        for r in ref:
            if r not in cit_dict:
                cit_dict[r] = []  # Initialize the list if key doesn't exist
            cit_dict[r].append(foc)

print(len(cit_dict))

with open(path_openalex+'OpenAlexID_CitingPaperIDs.txt','w') as f:
    for p in cit_dict:
        f.write(p+'\t'+'\t'.join(cit_dict[p])+'\n')
